import itertools
from datetime import datetime, timedelta

import elasticapm
from asgiref.sync import sync_to_async
from django.conf import settings
from django.db.models import Exists, Max, OuterRef, Prefetch, Q, Subquery, Value
from django.utils import timezone

from sysreptor.pentests.models import (
    ArchivedProject,
    FindingTemplate,
    FindingTemplateTranslation,
    PentestFinding,
    PentestProject,
    ProjectNotebookPage,
    ProjectType,
    ReportSection,
    UploadedImage,
    UploadedProjectFile,
    UploadedTemplateImage,
    UploadedUserNotebookFile,
    UploadedUserNotebookImage,
    UserNotebookPage,
)
from sysreptor.pentests.models.collab import CollabClientInfo
from sysreptor.pentests.models.files import ProjectNotebookExcalidrawFile
from sysreptor.tasks.models import PeriodicTaskInfo, periodic_task
from sysreptor.users.models import PentestUser
from sysreptor.utils import license
from sysreptor.utils.configuration import configuration
from sysreptor.utils.history import history_context
from sysreptor.utils.utils import groupby_to_dict


@elasticapm.async_capture_span()
@history_context(history_change_reason='Cleanup unreferenced files')
async def cleanup_project_files(task_info: PeriodicTaskInfo):
    # Only cleanup older files, to prevent race conditions: upload -> cleanup -> save text with reference -> referenced file already deleted
    older_than = timezone.now() - timedelta(days=2)
    projects = PentestProject.objects \
        .filter(created__lt=older_than) \
        .select_related('project_type') \
        .prefetch_related(
            'findings',
            'sections',
            'notes',
            Prefetch('images', UploadedImage.objects.filter(updated__lt=older_than), to_attr='images_cleanup'),
            Prefetch('files', UploadedProjectFile.objects.filter(updated__lt=older_than), to_attr='files_cleanup'),
        )
    # Only check projects that changed since the last cleanup
    if last_run := task_info.model.last_success:
        last_run = min(last_run, older_than - timedelta(days=1))
        projects = projects.filter(id__in=projects.filter(
            Q(updated__gt=last_run) |
            Q(findings__updated__gt=last_run) |
            Q(sections__updated__gt=last_run) |
            Q(notes__updated__gt=last_run),
        ).values_list('id'))

    # Check if files are referenced
    # Requires checking in python because of DB encryption
    cleanup_images = []
    cleanup_files = []
    async for p in projects:
        for f in p.images_cleanup:
            if not p.is_file_referenced(f):
                cleanup_images.append(f)
        for f in p.files_cleanup:
            if not p.is_file_referenced(f):
                cleanup_files.append(f)

    if cleanup_images:
        await UploadedImage.objects \
            .filter(pk__in=map(lambda f: f.pk, cleanup_images)) \
            .adelete()
    if cleanup_files:
        await UploadedProjectFile.objects \
            .filter(pk__in=map(lambda f: f.pk, cleanup_files)) \
            .adelete()


@elasticapm.async_capture_span()
async def cleanup_usernotebook_files(task_info: PeriodicTaskInfo):
    older_than = timezone.now() - timedelta(days=2)

    user_notes = UserNotebookPage.objects \
        .filter(user=OuterRef('pk'))
    if last_run := task_info.model.last_success:
        last_run = min(last_run, older_than - timedelta(days=1))
        user_notes = user_notes.filter(updated__gt=last_run)

    images_cleanup = UploadedUserNotebookImage.objects.filter(updated__lt=older_than)
    files_cleanup = UploadedUserNotebookFile.objects.filter(updated__lt=older_than)

    users = PentestUser.objects \
        .filter(created__lt=older_than) \
        .annotate(has_notes=Exists(user_notes)) \
        .annotate(has_files=Q(Exists(images_cleanup.filter(linked_object=OuterRef('pk')))) | Q(Exists(files_cleanup.filter(linked_object=OuterRef('pk'))))) \
        .filter(has_notes=True, has_files=True) \
        .prefetch_related(
            'notes',
            Prefetch('images', images_cleanup, to_attr='images_cleanup'),
            Prefetch('files', files_cleanup, to_attr='files_cleanup'),
        )

    cleanup_images = []
    cleanup_files = []
    async for u in users:
        for f in u.images_cleanup:
            if not u.is_file_referenced(f):
                cleanup_images.append(f)
        for f in u.files_cleanup:
            if not u.is_file_referenced(f):
                cleanup_files.append(f)

    if cleanup_images:
        await UploadedUserNotebookImage.objects \
            .filter(pk__in=map(lambda f: f.pk, cleanup_images)) \
            .adelete()
    if cleanup_files:
        await UploadedUserNotebookFile.objects \
            .filter(pk__in=map(lambda f: f.pk, cleanup_files)) \
            .adelete()


@elasticapm.async_capture_span()
async def cleanup_template_files(task_info: PeriodicTaskInfo):
    older_than = timezone.now() - timedelta(days=2)

    images_cleanup = UploadedTemplateImage.objects.filter(updated__lt=older_than)
    templates = FindingTemplate.objects \
        .annotate(has_files=Exists(images_cleanup.filter(linked_object=OuterRef('pk')))) \
        .filter(has_files=True) \
        .prefetch_related(
            'translations',
            Prefetch('images', images_cleanup, to_attr='images_cleanup'),
        )

    # Only check templates that changed since last cleanup
    if last_run := task_info.model.last_success:
        last_run = min(last_run, older_than - timedelta(days=1))
        templates = templates.filter(id__in=templates.filter(
            Q(updated__gt=last_run) |
            Q(translations__updated__gt=last_run),
        ).values_list('id'))

    cleanup_images = []
    async for t in templates:
        for f in t.images_cleanup:
            if not t.is_file_referenced(f):
                cleanup_images.append(f)

    if cleanup_images:
        await UploadedTemplateImage.objects \
            .filter(pk__in=map(lambda f: f.pk, cleanup_images)) \
            .adelete()


@periodic_task(id='cleanup_unreferenced_images_and_files', schedule=timedelta(days=1))
async def cleanup_unreferenced_images_and_files(task_info: PeriodicTaskInfo):
    await cleanup_project_files(task_info)
    await cleanup_usernotebook_files(task_info)
    await cleanup_template_files(task_info)


@periodic_task(id='reset_stale_archive_restores', schedule=timedelta(days=1))
async def reset_stale_archive_restores(task_info):
    """
    Deletes decrypted shamir keys from the database, when archive restore is stale (last decryption more than 3 days ago),
    i.e. some users decrypted their key parts, but some are still missing.
    Prevent decrypted shamir keys being stored in the DB forever.
    """
    from sysreptor.pentests.models import ArchivedProjectKeyPart

    await ArchivedProjectKeyPart.objects \
        .filter(decrypted_at__isnull=False) \
        .annotate(last_decrypted=Subquery(
            ArchivedProjectKeyPart.objects
            .filter(archived_project=OuterRef('archived_project'))
            .values('archived_project')
            .annotate(last_decrypted=Max('decrypted_at'))
            .values_list('last_decrypted'),
        )) \
        .filter(last_decrypted__lt=timezone.now() - settings.AUTOMATICALLY_RESET_STALE_ARCHIVE_RESTORES_AFTER) \
        .aupdate(decrypted_at=None, key_part=None)


@periodic_task(id='automatically_archive_projects', schedule=timedelta(days=1))
async def automatically_archive_projects(task_info):
    t = await configuration.aget('AUTOMATICALLY_ARCHIVE_PROJECTS_AFTER')
    if not t or not await license.ais_professional():
        return

    projects_to_archive = (await sync_to_async(PentestProject.objects.only_archivable)()) \
        .filter(readonly=True) \
        .filter(readonly_since__lt=timezone.now() - timedelta(days=t))
    async for p in projects_to_archive:
        await sync_to_async(ArchivedProject.objects.create_from_project)(p)


@periodic_task(id='automatically_delete_archived_projects', schedule=timedelta(days=1))
async def automatically_delete_archived_projects(task_info):
    t = await configuration.aget('AUTOMATICALLY_DELETE_ARCHIVED_PROJECTS_AFTER')
    if not t or not await license.ais_professional():
        return

    await ArchivedProject.objects \
        .filter(created__lt=timezone.now() - timedelta(days=t)) \
        .adelete()


async def perform_history_cleanup(model, to_cleanup):
    if len(to_cleanup) == 0:
        return
    await model.history.filter(history_id__in=map(lambda c: c['history_id'], to_cleanup)).adelete()
    to_cleanup.clear()


def get_instance_histories_to_cleanup(instance_histories):
    prevent_cleanup = set()
    cleanup_timeframes = [
        {
            'older_than': timezone.now() - timedelta(days=1),
            'timeframe': settings.SIMPLE_HISTORY_CLEANUP_TIMEFRAME,
        },
        {
            'older_than': timezone.now() - timedelta(hours=2),
            'timeframe': min(timedelta(minutes=30), settings.SIMPLE_HISTORY_CLEANUP_TIMEFRAME),
        },
        {
            'older_than': timezone.now() - timedelta(minutes=5),
            'timeframe': min(timedelta(minutes=10), settings.SIMPLE_HISTORY_CLEANUP_TIMEFRAME),
        },
        # do not cleanup history newer history entries
        {
            'older_than': timezone.now() + timedelta(days=1),
            'timeframe': timedelta(minutes=0),
        },
    ]
    # Do not cleanup the earliest and latest history entry
    prevent_cleanup.add(instance_histories[0]['history_id'])
    prevent_cleanup.add(instance_histories[-1]['history_id'])

    hst_timeframe_idx = 0
    for cleanup_timeframe in cleanup_timeframes:
        hst = list(itertools.takewhile(lambda h: h['history_date'] <= cleanup_timeframe['older_than'], instance_histories[hst_timeframe_idx:]))  # noqa: B023
        hst_timeframe_idx += len(hst)

        # Split history entries into windows for time-based cleanup
        cleanup_windows = []
        window = []
        for h in hst:
            window.append(h)
            # Prevent cleanup based on history entry properties
            if h['history_prevent_cleanup'] or h['history_type'] != '~':
                prevent_cleanup.add(h['history_id'])
                cleanup_windows.append(window)
                window = []
        if window:
            cleanup_windows.append(window)

        # Time-based cleanup per window per user
        # A cleanup window is defined as the time between two history entries that have the prevent_cleanup property set
        for w in cleanup_windows:
            for hst_u in groupby_to_dict(w, lambda h: str(h['history_user_id'])).values():
                # Keep latest history of user
                prevent_cleanup.add(hst_u[-1]['history_id'])
                # Time-based cleanup per user
                prev = hst_u[0]
                timeframe_start = hst_u[0]['history_date']
                for h in hst_u:
                    # Keep latest history entry before pause
                    if (h['history_date'] - prev['history_date']) >= cleanup_timeframe['timeframe']:
                        prevent_cleanup.add(prev['history_id'])
                        timeframe_start = h['history_date']
                    # Keep 1 history entry per timeframe
                    elif (h['history_date'] - timeframe_start) >= cleanup_timeframe['timeframe']:
                        prevent_cleanup.add(prev['history_id'])
                        timeframe_start = h['history_date']

                    prev = h

    to_cleanup = list(filter(lambda h: h['history_id'] not in prevent_cleanup, instance_histories))
    instance_histories.clear()
    return to_cleanup


@periodic_task(id='cleanup_history', schedule=timedelta(minutes=5))
async def cleanup_history(task_info):
    """
    Cleanup history entries of frequently changing models (e.g. because of auto save)
    to have a concise history timeline.
    """
    # Do not cleanup unfrequently changing models: files/images/assets, ProjectMemberInfo
    history_models = [
        PentestProject,
        ReportSection,
        PentestFinding,
        ProjectNotebookPage,
        ProjectNotebookExcalidrawFile,
        ProjectType,
        FindingTemplate,
        FindingTemplateTranslation,
    ]

    def model_from_name(name):
        return next(filter(lambda m: m.__name__ == name, history_models))

    if task_info.model.last_success:
        cleanup_time_start = task_info.model.last_success - timedelta(days=2)
    else:
        cleanup_time_start = timezone.make_aware(datetime.min)
    queryset = None
    for m in history_models:
        qs = m.history \
            .filter(history_date__gt=cleanup_time_start) \
            .annotate(history_model=Value(m.__name__)) \
            .values('history_id', 'history_date', 'history_type', 'history_user_id', 'history_prevent_cleanup', 'history_model', 'id')
        if queryset is None:
            queryset = qs
        else:
            queryset = queryset.union(qs)
    queryset = queryset.order_by('history_model', 'id', 'history_date')

    prev = None
    to_cleanup = []
    instance_histories = []
    async for h in queryset.aiterator():
        # On id changed
        if prev and prev['id'] != h['id']:
            to_cleanup.extend(get_instance_histories_to_cleanup(instance_histories))

        # On model changed
        if (prev and prev['history_model'] != h['history_model']) or len(to_cleanup) > 1000:
            await perform_history_cleanup(model_from_name(prev['history_model']), to_cleanup)

        instance_histories.append(h)
        prev = h

    if prev:
        to_cleanup.extend(get_instance_histories_to_cleanup(instance_histories))
        await perform_history_cleanup(model_from_name(prev['history_model']), to_cleanup)


@periodic_task(id='cleanup_collab_events', schedule=timedelta(hours=1))
async def cleanup_collab_events(task_info):
    from sysreptor.pentests.models import CollabEvent

    # Cleanup old events. They are not needed anymore
    events_older_than = timezone.now() - timedelta(hours=1)
    await CollabEvent.objects \
        .filter(created__lt=events_older_than) \
        .adelete()

    # Cleanup stale client infos in case they were not deleted on client disconnect
    clientinfo_stale = timezone.now() - timedelta(hours=1)
    await CollabClientInfo.objects \
        .filter(updated__lt=clientinfo_stale) \
        .adelete()
